/* Creates csds data for Matlab*/

set more off
use "Raw Data\csds_data_raw.dta", clear

***********************************Covariates*****************************
recode z11? (2=0) 
egen asset_index = rsum(z11a-z11m)

gen rural = (z8==1)

recode z3 (9=.)
label var z3 "education"
gen nonlit = (z3==1)
gen scst = (z5a ==1|z5a==2)

recode z2 (1=0) (2=1)
label var z2 "female"

recode z6 (3/8 = 9)
label var z6 "religion"

*Coding for Matlab dataset
tab z5a, gen(caste_)
tab z6, gen(religion_)
tab z3, gen(educ_)
tab z8, gen(location_)
tab e1, gen(interview_)

*Dropping observations with missing covariates
egen x = rowmiss(z1 z2 z3 z5a z6 z8 asset_index z13 e1)
drop if x>0 
drop x

*Merging sample data
merge m:1 state_name ac_num using "Analysis Data/AC_est_sample.dta" ,keep(matched) nogen

tab num_stations, gen(num_stations_)

*Outcomes 
gen issue_price = (q5==1) if q5!=98
gen issue_corrupt = (q5==2) if q5!=98
gen issue_unemp = (q5==3) if q5!=98
gen issue_dev = (q5==4) if q5!=98

gen party_admin = (q15a==q1a) if q15a<90
gen party_relig = (q15b==q1a) if q15b<90
gen party_security = (q15c==q1a) if q15b<90
gen party_leader = (q15d==q1a) if q15d<90
gen party_gifts = (q15e==q1a) if q15e<90

***************************************************************************
tempfile vb_samp
preserve
use "Analysis Data\votebuying.dta", clear
bys state_name pc_num: drop if _n>1
keep state_name pc_num
save `vb_samp', replace
restore

***********************************************************************************
*Voting
gen party_name = "NDA" if inlist(q1a,2,10,46,86,48) //no RSP, LJP, and AD in sample
replace party_name = "UPA" if inlist(q1a,1,6,19,35) //no RLD in sample
replace party_name = "AAAP" if q1a == 21
replace party_name = "AJSUP" if q1a == 34
replace party_name = "BJD" if q1a == 58
replace party_name = "BSP" if q1a == 3
replace party_name = "JD(S)" if q1a == 37
replace party_name = "JVM" if q1a == 36
replace party_name = "TRS" if q1a == 7
replace party_name = "YSRCP" if q1a == 8
replace party_name = string(q1a) if party_name==""

merge m:1 state_name pc_num using `vb_samp', keep(master matched)
gen vb_samp = (_m==3)
drop _m

merge m:1 state_name pc_num party_name using "Analysis Data/votebuying.dta", keepusing(state_name)  keep(master matched)

gen voted_votebuyer = (_m==3)
replace voted_votebuyer = . if q1a == 97
drop _m

replace voted_votebuyer = . if vb_samp==0
drop vb_samp

**********************************Data for matlab******************************
gen x =1 
bys state_name ac_num: egen num_survey = count(x)
gen wgt2 = 1/num_survey
drop x

gen constant = 1

tab poll_date, gen(poll_date_)

keep state_name ac_num constant poll_date_? issue_price-issue_dev party_admin-party_gifts voted_votebuyer num_stations_? asset_index- interview_8 z1 z2 z13 treat station_id1 station_id2 wgt2

*Weighting observations
foreach var of varlist constant treat issue_price-issue_dev party_admin-party_gifts voted_votebuyer num_stations_? asset_index-interview_8 z1 z2 z13 {
replace `var' = sqrt(wgt2)*`var'
}
    
outsheet using "Table 4/table4_data.csv", comma replace nolabel
